## title:   Narcissism in Political Participation
## authors: Z. Fazekas & P.K. Hatemi
## study:   Denmark 2011
## goal:    clean and prepare covariates
##          clean and prepare outcome variables
##          clean and prepare narcissism variables
##          measurement summary
## ----
# 0. packages and helper --
library("psych")
library("dplyr")
library("stringr")
library("semTools")

## 1. load data
source("helper.R")
dk <- read.csv("./data/dk11.csv")

# dk <- dplyr::select(dk,
#                     v2011_indid, v2011_koen1, v2011_alder, v2011_sidstuddan_ds,
#                     v2011_q57, v2011_fv11_01,
#                     starts_with("v2011_q71"), starts_with("v2011_q47b"))
# 
# write.csv(dk, file = "./data/dk11.csv", row.names = FALSE)

## 2. covariates
dk$female <- NA
dk$female[dk$v2011_koen1 == 1] <- 0
dk$female[dk$v2011_koen1 == 2] <- 1 # female (original 1 = male, 2 = female)
dk$v2011_koen1 <- NULL


dk$age <- dk$v2011_alder # age (just renaming)
dk$v2011_alder <- NULL
dk$edu <- dk$v2011_sidstuddan_ds - 1 # education (last completed education)
dk$v2011_sidstuddan_ds <- NULL
dk$edu_cat <- ifelse(dk$edu <= 4, 0, 1) # higher education dummy (1)

## interest in politics
dk$interest <- abs(dk$v2011_q57 - 4) ## political interest
dk$v2011_q57 <- NULL


## 3. participation
## defining shorter (8-item) participation battery
part_vars <- c("v2011_q71_1", "v2011_q71_2", "v2011_q71_3", 
               "v2011_q71_4", "v2011_q71_5", "v2011_q71_6", 
               "v2011_q71_7", "v2011_q71_9")
dk[, part_vars] <- abs(dk[, part_vars] - 4) # all are reverse coded 
                                            # (see codebook), 
                                            # missing is system missing
dk[, part_vars] <- dk[, part_vars]/3 # rescaling, range [0-1]

## setting up confirmatory FA model
part.factor <- 'part_1 =~ v2011_q71_5 + v2011_q71_7 + v2011_q71_4 + v2011_q71_9 +
                          v2011_q71_1 + v2011_q71_2 + v2011_q71_3 +
                          v2011_q71_6'
## fitting the model
## with fixing the variance of the factors (not first loading to 1)
part.factor.fit <- cfa(part.factor, data = dk, std.lv = TRUE)
summary(part.factor.fit, fit.measures = TRUE) # simple summary of object

dk$part_full <- rowSums(dk[, part_vars])/8 ## within individual summed (mean)
                                           ## participation score


## narcissism --
## based on codebook, 3 items need reversal
dk$v2011_q47b_5r  <- 1- dk$v2011_q47b_5
dk$v2011_q47b_6r  <- 1- dk$v2011_q47b_6
dk$v2011_q47b_11r <- 1- dk$v2011_q47b_11

# order in codebook / check 3 factor solution with "close available"

## 15 (all) item check
npi_vars <- c("v2011_q47b_1", "v2011_q47b_2", "v2011_q47b_3", 
            "v2011_q47b_4", "v2011_q47b_5r", "v2011_q47b_6r", 
            "v2011_q47b_7", "v2011_q47b_8", "v2011_q47b_9", 
            "v2011_q47b_10", "v2011_q47b_11r", "v2011_q47b_12",
            "v2011_q47b_13", "v2011_q47b_14","v2011_q47b_15") # narc 15-item

npi_add <- paste0("npi_", c(1,
                           6,
                           11,
                           16,
                           22,
                           23,
                           24,
                           27,
                           29,
                           31,
                           32,
                           36,
                           37,
                           39,
                           14))

dk[, npi_add] <- dk[, npi_vars] # all labeled now according to full npi questionnaire

lead_auth   <- c("npi_1", "npi_11", "npi_27", "npi_32", "npi_36")
grand_exhib <- c("npi_29", "npi_37", "npi_6")
ent_exp     <- c("npi_14", "npi_24", "npi_22", "npi_39")
npi25_vars <- c(lead_auth, grand_exhib, ent_exp)

# 3-factor structure

# first model: all/29 + 37 + 6/14 + 24
npi.3factor <-  'lead_auth =~ npi_1 + npi_11 + npi_27 + npi_32 + npi_36
                  grand_exh =~ npi_29
                  npi_29 ~~ 0*npi_29
                  grand_exh ~~ 1*grand_exh
                  ent_exp   =~ npi_14 + npi_24'
npi.3factor.fit <- cfa(npi.3factor, data = dk, ordered = npi25_vars)
summary(npi.3factor.fit, fit.measures = TRUE, standardized = TRUE)

# redefine
lead_auth   <- c("npi_1", "npi_11", "npi_27", "npi_32", "npi_36")
psych::alpha(dk[, lead_auth])
grand_exhib <- c("npi_29")
ent_exp     <- c("npi_14", "npi_24")
npi25_vars  <- c(lead_auth, grand_exhib, ent_exp)
psych::alpha(dk[, npi25_vars])

# 7-factor model; CFA
npi_vars <- npi_add
npi.factor <-   'auth =~ npi_1 + npi_11 + npi_32 + npi_36
  exp =~ npi_6 + npi_16 + npi_23
  suf =~ npi_22 + npi_31 + npi_39
  ent =~ npi_14 + npi_24 + npi_27
  van =~ npi_29
  npi_29 ~~ 0*npi_29
  van ~~ 1*van
  sup =~ npi_37
  npi_37 ~~ 0*npi_37
  sup ~~ 1*sup'
npi.factor.fit <- cfa(npi.factor, 
                      data = dk[, npi_vars], 
                      ordered = npi_vars)
summary(npi.factor.fit, fit.measures = TRUE)


# extract factor loadings
fit_40 <- data.frame(standardizedSolution(npi.factor.fit))[c(1:14, 17), c(1, 3:5)]
fit_40[, 3:4] <- round(fit_40[, 3:4], 3)
fit_40$se[fit_40$se == 0.000 & fit_40$est != 1] <- 0.001
fit_40$rhs <- stringr::str_replace(fit_40$rhs, "npi_", "Item")

fit_40$lhs <- stringr::str_replace(fit_40$lhs, "auth", "Authority")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "exhib", "Exhibitionism")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "sup", "Superiority")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "exp", "Exploitativeness")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "ent", "Entitlement")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "van", "Vanity")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "suf", "Self-sufficiency")

fit_40$lhs <- factor(fit_40$lhs, levels = c("Authority",
                                            "Exhibitionism",
                                            "Superiority",
                                            "Exploitativeness",
                                            "Entitlement",
                                            "Vanity",
                                            "Self-sufficiency"))
fit_40 <- mutate(fit_40, label = paste0(est.std, " (", se, ")"))
fit_40_dk11<- fit_40 %>% group_by(lhs) %>% 
  dplyr::select(lhs, rhs, label)

# 3-factor
fit_25 <- data.frame(standardizedSolution(npi.3factor.fit))[c(1:6, 9:10), c(1, 3:5)]
fit_25[, 3:4] <- round(fit_25[, 3:4], 3)
fit_25$se[fit_25$se == 0.000 & fit_25$est != 1] <- 0.001
fit_25$rhs <- stringr::str_replace(fit_25$rhs, "npi_", "Item")

fit_25$lhs <- stringr::str_replace(fit_25$lhs, "lead_auth", "Leadership/Authority")
fit_25$lhs <- stringr::str_replace(fit_25$lhs, "ent_exp", "Entitlement/Exploitativeness")
fit_25$lhs <- stringr::str_replace(fit_25$lhs, "grand_exh", "Grandiose/Exhibitionism")
fit_25$lhs <- factor(fit_25$lhs, levels = c("Leadership/Authority",
                                            "Grandiose/Exhibitionism",
                                            "Entitlement/Exploitativeness"))
fit_25 <- mutate(fit_25, label = paste0(est.std, " (", se, ")"))
fit_25_dk11 <- fit_25 %>% group_by(lhs) %>% 
  dplyr::select(lhs, rhs, label)

fit_25_dk11
fit_40_dk11

# create and recode narcissism subfacets
aut_vars <- c("npi_1", "npi_11", "npi_32", "npi_36") # Authority
psych::alpha(dk[, aut_vars]) # 0.56
exp_vars <- c("npi_6", "npi_16", "npi_23") # Exploitativeness
psych::alpha(dk[, exp_vars]) # 0.19
suf_vars <- c("npi_22", "npi_31", "npi_39") # Self-sufficiency
psych::alpha(dk[, suf_vars]) # 0.21
ent_vars <- c("npi_14", "npi_24", "npi_27") # Entitlement
psych::alpha(dk[, ent_vars]) # 0.38
van_vars <- c("npi_29") # Vanity
sup_vars <- c("npi_37") # Superiority
psych::alpha(dk[, npi_vars]) # 0.63

dk$npi <- rowSums(dk[, npi_vars])/length(npi_vars)
dk$authority <- rowSums(dk[, aut_vars])/length(aut_vars)
dk$exploit <- rowSums(dk[, exp_vars])/length(exp_vars)
dk$entitle <- rowSums(dk[, ent_vars])/length(ent_vars)
dk$sufficient <- rowSums(dk[, suf_vars])/length(suf_vars)
dk$vanity <- dk[, van_vars]
dk$superior <- dk[, sup_vars]
           
narc_vars <- c("npi", "authority", "superior", "exploit", 
               "entitle", "vanity", "sufficient") # store out narcissism variable names
           
dk$npi25 <- rowSums(dk[, npi25_vars])/length(npi25_vars)
dk$leadauth <- rowSums(dk[, lead_auth])/length(lead_auth)
dk$entexp <- rowSums(dk[, ent_exp])/length(ent_exp)
dk$grandexhib <- dk[, grand_exhib]

narc25_vars <- c("npi25", "leadauth", "grandexhib", "entexp") # store out narcissism variable names
           
# means/SD 
# apply(dk[, narc_vars], 2, function(x) paste0(round(mean(x, na.rm = TRUE), 3),
#                                                         " (", round(sd(x, na.rm = TRUE), 3), ")"))
#            
# apply(dk[, narc25_vars], 2, function(x) paste0(round(mean(x, na.rm = TRUE), 3),
#                                                           " (", round(sd(x, na.rm = TRUE), 3), ")"))


## variable recoding and export for regression

##
dk$turnout <- NA
dk$turnout[dk$v2011_fv11_01 == 2] <- 0
dk$turnout[dk$v2011_fv11_01 == 1] <- 1

dk <- dplyr::select(dk, v2011_indid, female:part_full, npi:turnout)

## all political variables (potential dv) recoded [0, 1]
pol_vars <- c("interest")
dk[, pol_vars] <- dk[, pol_vars]/3

## 2SD standardization of variables of interest
narc_comp <- c("npi", "authority", "exploit", "sufficient", 
               "entitle", "vanity", "superior",
               narc25_vars)
to_recode <- c(narc_comp, "age")
to_recode <- to_recode[-c(6, 7, 10)] ## vanity, sup, grandexp 1 variable only, kept 0/1
rec_vars  <- paste0(to_recode, "_sd")

dk[, rec_vars] <- apply(dk[, to_recode], 2,
                        function (x) two_sd(x))
dk$vanity_sd <- dk$vanity
dk$superior_sd <- dk$superior
dk$grandexhib_sd <- dk$grandexhib

## add study ID
dk$study <- "DK11"

## export dataset for regression
write.csv(dk, file = "./data/dk11-reg.csv", row.names = FALSE)


# R version 3.5.1 (2018-07-02)
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS  10.15.4
# 
# Matrix products: default
# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] semTools_0.5-1 lavaan_0.6-3   stringr_1.4.0  dplyr_0.8.3    psych_1.8.12  
# 
# loaded via a namespace (and not attached):
#   [1] Rcpp_1.0.2       rstudioapi_0.10  magrittr_1.5     tidyselect_0.2.5 mnormt_1.5-5     pbivnorm_0.6.0   lattice_0.20-35 
# [8] R6_2.4.0         rlang_0.4.2      fansi_0.4.0      tools_3.5.1      parallel_3.5.1   grid_3.5.1       nlme_3.1-137    
# [15] utf8_1.1.4       cli_1.1.0        yaml_2.2.0       assertthat_0.2.1 tibble_2.1.3     crayon_1.3.4     purrr_0.3.3     
# [22] vctrs_0.2.1      zeallot_0.1.0    glue_1.3.1       stringi_1.4.3    compiler_3.5.1   pillar_1.4.2     backports_1.1.5 
# [29] stats4_3.5.1     foreign_0.8-70   pkgconfig_2.0.3 

